import requests
import re

from requests.api import get


city = 'guangzhou'
url = 'http://www.dianping.com/' + city + '/ch10'
headers = {
    'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
}

response = requests.get(url, headers=headers)
print(response.status_code)
with open('./spider/dazhongdp/dazhongdianping_food/response_text', 'w', encoding='utf-8') as f:
    f.write(response.text)
    
def get_data():
    infos = re.findall(r'<li class="" >(.*?)<div class="operate J_operate Hide">', response.text, re.S | re.M)
    for info in infos:
        shop_name = re.findall(r'<h4>(.*?)<\/h4>', info, re.S | re.M)[0]
        print(shop_name)
        try:
            score = re.findall(r'<div class="star_score score_.*?star_score_sml">(.*?)</div>', info)
            print(score)
        except:
            score = 'null'
        try:
            num_comments = re.findall(r'LXAnalytics\(\'moduleClick\', \'shopreview\'\).*?>(.*?)<\/b>', info, re.S | re.M)[0]
            num_comments = ''.join(re.findall(r'>(.*?)<', num_comments, re.S | re.M))
            print(num_comments)
        except:
            num_comments = 'null'
        try:
            avg_price = re.findall(r'<b>￥(.*?)<\/span>', info, re.S | re.M)[0]
            print(avg_price)
        except:
            avg_price = 'null'


get_data()